Partie 1 : Analyse descriptive

Unique French names

## Warning: Removed 1 rows containing missing values (geom_path).

Working with department data

=======

Project

Hugo Broucke

07/12/2019

Part 1 : Descriptive analysis

Unique French names

Evolution over time

#Changing the format of year in national data
data_nat_clean$year <- str_pad(data_nat_clean$year,5,"right")
data_nat_clean$year <- str_replace(data_nat_clean$year," ","-31-12")
data_nat_clean$year <- as.Date(data_nat_clean$year, format="%Y-%d-%m")

#Calculate number of unique names
distinct_names <- data_nat_clean %>% 
                  group_by(year) %>% 
                  distinct(firstname) %>%
                  summarise(n_names = n())

#Plotting result                 
plot_distinct_names <- 
  distinct_names %>% 
  ggplot(aes(x=year, y=n_names)) + 
  geom_line(size=1.2, color="blue") +         
  ggtitle("Number of French unique names from 1900 to 2018") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Year") + ylab("Number of unique names")

plot_distinct_names

Difference between one year to another over time

>>>>>>> 8af0a49bc1288e9e22a1afdae5d11af6a2e1eb42

Partie 2

zinedine <- data_nat_clean # %>% filter(year(year) > 1993 & year(year)<2003)
#zinedine$year <- as.factor(zinedine$year)
zinedine1 <- zinedine %>% filter(firstname == "ZINEDINE"  | firstname=="BIXENTE" | firstname=="YOURI")
graph <- ggplot(zinedine1, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1.5) + 
  ggtitle("Plot of number of name by years for football player in 1998") +
  xlab("Year") + ylab("Number of name") +
  geom_vline(aes (xintercept =as.numeric(as.Date("1998-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1998-01-01")), y=180, label="1998 Football world cup",colour = "black", show.legend=FALSE)
graph

#CM18 <- data_nat_clean %>% filter(year(year) > 2010 )
#CM18 <- CM18  %>% filter(firstname == "ANTOINE"  | firstname=="KYLIAN" | firstname=="BLAISE" | firstname=="BENJAMIN" | firstname=="SAMUEL" | firstname=="HUGO")
#graph <- ggplot(CM18, aes(x = year, y = number, colour = firstname))+ 
 # geom_line(size=1.5) + ggtitle("Plot of number of name by years for football player IN 2018") +
  #xlab("Year") + ylab("Number of name")
#graph

got <- data_nat_clean %>% filter(firstname=="BRAN" |firstname == "SANSA" |firstname == "DAENERYS")
got <- got %>% filter(year(year) > 2004 )
graph <- ggplot(got, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1.5) +
  ggtitle("Plot of number of name by years in link with Game of throne") +
  xlab("Year") + ylab("Number of name")+
  geom_vline(aes (xintercept =as.numeric(as.Date("2011-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("2011-01-01")), y=20, label="Game of throne, season 1",colour = "black", show.legend=FALSE)
graph

arwen <- data_nat_clean %>% filter(firstname == "ARWEN" & sex=="2")
cinema <- data_nat_clean %>% filter(firstname=="NEO" |firstname == "BELLA" |firstname == "ANAKIN")
cinema <- bind_rows(arwen, cinema)
cinema <- cinema%>% filter(year(year) > 1990 )
graph <- ggplot(cinema, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1.5) + ggtitle("Plot of number of name by years in link with the cinema") +
  xlab("Year") + ylab("Number of name")+
  geom_vline(aes (xintercept =as.numeric(as.Date("1999-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1999-01-01")), y=25, label="Matrix",colour = "black", size = 3, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("2009-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("2009-01-01")),y=105, label="Twilight",colour = "black", size = 3, show.legend=FALSE)+
  geom_label(x=as.numeric(as.Date("1999-01-01")), y=100, label="Star Wars: Episode I ",colour = "black", size = 3, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("2001-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("2001-01-01")), y=175, label="The Lord of the rings ",colour = "black", size = 3, show.legend=FALSE)
graph

adolphe <- data_nat_clean %>% filter(firstname == "ADOLPHE" & sex=="1")
adolphe <- adolphe %>% filter(year(year) > 1909 & year(year) < 1951 )
victoire <- data_nat_clean %>% filter(firstname == "VICTOIRE" & sex=="2")
victoire <- victoire %>% filter(year(year) > 1909 & year(year) < 1951 )

ww <- data_nat_clean %>% filter(firstname == "JOFFRE" | firstname == "JOFFRETTE"| firstname == "ADOLPHINE")
ww <- ww%>% filter(year(year) > 1909 & year(year) < 1951 )
ww <- bind_rows(ww, adolphe, victoire)
graph <- ggplot(ww, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1) + ggtitle("Plot of number of name by years in link with the world wars") +
  xlab("Year") + ylab("Number of name")+
  geom_vline(aes (xintercept =as.numeric(as.Date("1914-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1914-01-01")), y=0, label="Marne's Battle",colour = "black", size = 2.5, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("1921-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1921-01-01")),y=550, label="Hitler leader of the NSDAP",colour = "black", size = 2.5, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("1918-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1918-01-01")), y=500, label="End of the World War I",colour = "black", size = 2.5, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("1945-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1945-01-01")), y=200, label="End of the World II",colour = "black", size = 2.5, show.legend=FALSE)
graph

got <- data_nat_clean %>% filter(firstname=="FELICIE" |firstname == "ROXANNE" |firstname == "OSCAR" |firstname == "LOLITA")
#got <- got %>% filter(year(year) > 2004 )
graph <- ggplot(got, aes(x = year, y = number, colour = firstname))+
  geom_line(size=1.5) +
  ggtitle("Plot of number of name by years in link with Game of throne") +
  xlab("Year") + ylab("Number of name")
  #geom_vline(aes (xintercept =as.numeric(as.Date("2011-01-01"))), linetype = "longdash")+
  #geom_label(x=as.numeric(as.Date("2011-01-01")), y=20, label="Game of throne, season 1",colour = "black", show.legend=FALSE)
graph

data_dpt_clean$year <- str_pad(data_dpt_clean$year,5,"right")
data_dpt_clean$year <- str_replace(data_dpt_clean$year," ","-31-12")
data_dpt_clean$year <- as.Date(data_dpt_clean$year, format="%Y-%d-%m")

dep <- data_dpt_clean %>% filter(year(year)>1989 & year(year)<2013)
dep <-dep %>% mutate(department=case_when(
  department %in% c("95", "78", "91", "77","94","92","93","75") ~ "Ile-de-France",
  department %in% c("08","51","10","52") ~ "Champagne-Ardenne",
  department %in% c("02","60","80") ~ "Picardie",
  department %in% c("76", "27") ~ "Haute-normandie",
  department %in% c("18","28","36","37", "41", "45") ~ "Centre",
  department %in% c("14","50","61") ~ "Basse-Normandie",
  department %in% c("21", "58", "71", "89") ~ "Bourgogne",
  department %in% c("59","62") ~ "Nord-Pas-de-Calais",
  department %in% c("54","55","57", "88") ~ "Lorraine",
  department %in% c("67", "68") ~ "Alsace",
  department %in% c("25","39","70","90") ~ "Franche-Comté",
  department %in% c("44","49","53", "72","85") ~ "Pays de la Loire",
  department %in% c("22", "29", "35", "56") ~ "Bretagne",
  department %in% c("16","17","79","86") ~ "Poitou-CharenteS",
  department %in% c("24","33","40","47","64") ~ "Aquitaine",
  department %in% c("09", "12", "31", "32","46","65","81","82") ~ "Midi-Pyrenées",
  department %in% c("19","23","87") ~ "Limousin",
  department %in% c("01","07","26","38","42","69","73","74") ~ "Rhone-Alpes",
  department %in% c("03", "15", "43", "63") ~ "Auvergne",
  department %in% c("11","30","34","48", "66") ~ "Languedoc-Roussillon",
  department %in% c("04","05","06","13", "83","84") ~ "PACA ",
  department %in% c("20") ~ "Corse",
  department %in% c("971","972","973","974") ~ "Overseas territories",
  ))
dep <- dep %>% rename(Region = department)

eco1990boy <- dep %>% filter(sex==1, year(year)==1990) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco1990boy <- merge(eco1990boy, eco, by = "Region")
eco1990girl <- dep %>% filter(sex==2, year(year)==1990) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco1990girl <- merge(eco1990girl, eco, by = "Region")
eco1990 <- bind_rows(eco1990boy, eco1990girl)

graph <- ggplot(eco1990boy, aes(x = firstname, y = X1990, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 1990 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

graph <- ggplot(eco1990girl, aes(x = firstname, y = X1990, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 1990 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

eco2000boy <- dep %>% filter(sex==1, year(year)==2000) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2000boy <- merge(eco2000boy, eco, by = "Region")
eco2000girl <- dep %>% filter(sex==2, year(year)==2000) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2000girl <- merge(eco2000girl, eco, by = "Region")
eco2000 <- bind_rows(eco2000boy, eco2000girl)

graph <- ggplot(eco2000boy, aes(x = firstname, y = X2000, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2000 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

graph <- ggplot(eco2000girl, aes(x = firstname, y = X2000, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2000 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

eco2010boy <- dep %>% filter(sex==1, year(year)==2010) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2010boy <- merge(eco2010boy, eco, by = "Region")
eco2010girl <- dep %>% filter(sex==2, year(year)==2010) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2010girl <- merge(eco2010girl, eco, by = "Region")
eco2010 <- bind_rows(eco2010boy, eco2010girl)

graph <- ggplot(eco2010boy, aes(x = firstname, y = X2010, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2010 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

graph <- ggplot(eco2010girl, aes(x = firstname, y = X2010, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2010 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

The popularity of Franch Names

In this part we will study which events have an impact on the popularity of French first names.

French names and sport

In this part we will study the impact of sport on the popularity of French first names. To illustrate this example, we chose to study the most famous sports event in France: the 1998 World Cup. This World Cup was won by the French for the first time.

In this graph, we see that the names of three players of the team of France at the 1998 World Cup have a peak of popularity after the victory of France. We can therefore assume that major sports events have an impact on French first names.

French names and series

The next graph will highlight the impact of TV series on the first name of the French. To illustrate this, we chose one of the best known series in France: Games of throne. We can see the names of the famous characters Bran, Daenerys and Sensa.

We even observe that the names Daenerys and Sensa appeared just after the diffusion of the series.

French names and films

In the same spirit as the previous part, we will now observe the impact of films on French first names. For that we selected the films Matrix with the character of Neo, the movie Star Wars: Episode I with e character of Anakin, the movie The lord of the rings with Arwen and finally the movie Twilight with the name Bella. Besides, the name Bella, the other names appeared in France just after the diffusion of these films. The cinema therefore has an impact on French first names.

French names and world wars

One of the most tragic recent events in France is the two world wars. So we decided to see the impact of these wars on the first names of the French. One of the heroes of the war is Marchal Joffre, who won the Marne victory. We can see that this victory has an impact on the names of the French since the names Joffre and his feminine Joffrette have a peak of popularity just after this battle. When we look at the name Victoire, which means victory in French, there are two peaks in 1918 and 1945 which are the dates of the end of these two wars. On the other hand, when we look at the names Adolphes and its feminine derivative Adolphine, we see that their popularity plummets when Adolph Hitler comes to power in Germany.

French names and music

In this part, we will study the impact of music on french names. We have selected for this three songs. The first is Roxanne by The Police, this song released in 1978 will increase the popularity of the name Roxanne in France. Similarly the song Oscar by Renaud released in 1981 will explode the popularity of the name Oscar. Finally, the song Moi … Lolita by Alizé will create a new peak of popularity for the name Lolita.

French names and economy

In this part we will observe if there is a correlation between French first names and the economy of the regions. For that we studied the names most given to the little boy and the little girl in 1990, 2000 and 2010 for each French region.

In 1990 we notice that the region Ile de France which is the richest to a popular name differs from other French regions. This is the case for the male and female first names.

The same phenomenon is observed in 2000.

In 2010, the richest region still has different names from other regions. With these observations, it is difficult to say that the economy has an impact on French first names. If we see a difference of first names in the richest region, we do not observe any difference with the poorest region.